library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)## Warning: package 'plotly' was built under R version 3.4.4
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggmap)## Warning: package 'ggmap' was built under R version 3.4.4
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
##
## wind
library(ggplot2)
library(highcharter)## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(readr)
historical_web_data = readRDS("data\\historical_web_data_26112015.rds")
disaster = read_delim("data\\disaster.txt", "\t", escape_double = FALSE, trim_ws = TRUE)## Parsed with column specification:
## cols(
## .default = col_integer(),
## FLAG_TSUNAMI = col_character(),
## SECOND = col_character(),
## EQ_PRIMARY = col_double(),
## EQ_MAG_MW = col_double(),
## EQ_MAG_MS = col_double(),
## EQ_MAG_MB = col_character(),
## EQ_MAG_ML = col_double(),
## EQ_MAG_MFA = col_character(),
## EQ_MAG_UNK = col_double(),
## COUNTRY = col_character(),
## STATE = col_character(),
## LOCATION_NAME = col_character(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## MISSING = col_character(),
## DAMAGE_MILLIONS_DOLLARS = col_character(),
## TOTAL_MISSING = col_character(),
## TOTAL_MISSING_DESCRIPTION = col_character(),
## TOTAL_DAMAGE_MILLIONS_DOLLARS = col_character()
## )
## See spec(...) for full column specifications.
iran_earthquake = readRDS("data\\iran_earthquake.rds")
iran_map = read_rds("data\\Tehrn_map_6.rds")
worldwide = read.csv("data\\worldwide.csv")
plot1 <- plot_ly(historical_web_data, x = ~Latitude, y = ~Longitude, z = ~Depth, color = ~Magnitude, sizes = c(5, 550), size = ~Magnitude)
plot1## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: package 'bindrcpp' was built under R version 3.4.4
disaster %>% filter(FLAG_TSUNAMI == "Tsu") %>%
rename(lat = LATITUDE,lon = LONGITUDE, z = EQ_PRIMARY,name = COUNTRY,sequence = YEAR) %>%
select(lat, lon, name, z) -> dis
hcmap() %>%
hc_add_series(data = dis, type = "mapbubble",
minSize = 0, maxSize = 30) %>%
hc_plotOptions(series = list(showInLegend = FALSE))
iran_big_earthquakes <- iran_earthquake %>% filter(Mag> 5)
#View(iran_big_earthquakes)
ggmap(iran_map) + geom_point(aes(x = Long, y = Lat, size = Mag), data = iran_big_earthquakes, alpha = .5, color="darkred") + geom_density_2d()## Warning: Removed 1 rows containing missing values (geom_point).
firstup <- function(x) {
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
x
}
disaster %>% filter(is.na(FLAG_TSUNAMI)) %>% rowwise() %>% mutate(cCOUNTRY = firstup(tolower(COUNTRY))) %>% group_by(cCOUNTRY) %>% summarise(LATITUDE = mean(LATITUDE, na.rm = T), LONGITUDE = mean(LONGITUDE, na.rm = T), numbers = n(), death_sum = sum(DEATHS, na.rm = T)) %>% rowwise() %>% mutate(meann = death_sum/numbers) -> data5## Warning: Grouping rowwise data frame strips rowwise nature
View(data5)
hcmap(data = data5, value = "death_sum", joinBy = c("name", "cCOUNTRY"))
disaster %>% filter(is.na(FLAG_TSUNAMI)) %>% select(id = I_D, lat = LATITUDE, long = LONGITUDE, death = DEATHS, foc = FOCAL_DEPTH, intensity = INTENSITY) -> data6
fit = lm(death ~lat + long + foc + intensity, data = data6)
print(summary(fit))##
## Call:
## lm(formula = death ~ lat + long + foc + intensity, data = data6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11822 -5647 -3353 500 230352
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12551.48 4616.81 -2.719 0.006867 **
## lat 22.67 46.61 0.486 0.626998
## long 23.33 12.78 1.825 0.068784 .
## foc -26.86 30.18 -0.890 0.374166
## intensity 1994.26 549.01 3.632 0.000321 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17890 on 366 degrees of freedom
## (3861 observations deleted due to missingness)
## Multiple R-squared: 0.05365, Adjusted R-squared: 0.0433
## F-statistic: 5.187 on 4 and 366 DF, p-value: 0.0004487
We can see the coefficients in the summary of the fit model. ***
View(worldwide)
depth_worldwide = worldwide$depth
mag_worldwide= worldwide$mag
cor.test(depth_worldwide, mag_worldwide)##
## Pearson's product-moment correlation
##
## data: depth_worldwide and mag_worldwide
## t = 50.03, df = 60629, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1914584 0.2067469
## sample estimates:
## cor
## 0.1991147
As we can see the P_value for H0 (correlation between depth and mag is 0) is very low. So we can reject the null hypothesis and conclude that there is a correlation between depth and magnitude.
View(disaster)
library(stringr)## Warning: package 'stringr' was built under R version 3.4.4
View(worldwide)
worldwide %>% rowwise() %>% mutate(year = unlist(str_split(time, "-"))[1], country = unlist(str_split(place, ","))[2]) %>% filter(type == "earthquake") %>% group_by(country, year) %>% summarise(numbers = n()) %>% group_by(country) %>% summarise(summs = sum(numbers), nn = n(), mean_by_year = sum(numbers)/n()) %>% arrange(-mean_by_year) %>% slice(1:60) -> data9## Warning: Grouping rowwise data frame strips rowwise nature
data9 %>% ggplot() + geom_bar(aes(x = reorder(country, mean_by_year), y = mean_by_year), stat = "identity") + theme(axis.text.x = element_text(angle = 90))
- We check if there is a relation between mag and gap of an earthquake.
gap_worldwide = worldwide$gap
mag_worldwide= worldwide$mag
cor.test(gap_worldwide, mag_worldwide)##
## Pearson's product-moment correlation
##
## data: gap_worldwide and mag_worldwide
## t = -94.188, df = 54620, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3809926 -0.3665636
## sample estimates:
## cor
## -0.3738007
We can see there is a correlation between gap and magnitude.
Now we will check if the mean number of deaths in a earthquake and a tsunami is the same:
Tsunami <- disaster %>% filter(FLAG_TSUNAMI == "Tsu") %>% select(DEATHS) %>% as.vector() %>% filter(is.na(DEATHS) == F)
earthQuake <-Tsunami <- disaster %>% filter(is.na(FLAG_TSUNAMI) == T) %>% select(DEATHS) %>% as.vector() %>% filter(is.na(DEATHS) == F)
p = data.frame(Tsunami = Tsunami$DEATHS, earthQuake = earthQuake$DEATHS)
aov (Tsunami ~ earthQuake, data = p) -> q
summary(q)## Df Sum Sq Mean Sq F value Pr(>F)
## earthQuake 1 1.061e+12 1.061e+12 6.851e+32 <2e-16 ***
## Residuals 1588 0.000e+00 0.000e+00
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
According to the P-value, we can say the means of deaths of earthquakes and Tsunamies are different.
And now we will see if there is a relation between magnitude and its error:
err_worldwide = worldwide$magError
mag_worldwide= worldwide$mag
cor.test(err_worldwide, mag_worldwide)##
## Pearson's product-moment correlation
##
## data: err_worldwide and mag_worldwide
## t = -25.645, df = 48514, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1244214 -0.1068628
## sample estimates:
## cor
## -0.1156511
And as we can see, there is a relation between these two parameters.